# Load required packages
require(sf)
require(tidyverse)

# Load pre-processed points data
load("00_data/01_data_processed/locations_sites.Rdata")     
load("00_data/01_data_processed/data_epa_analysis.Rdata")

# Filter points with missing coordinates
points <-  points %>% 
  filter(!is.na(LATITUDE83))

# Read shapefile for US states
US_SHAPE <- sf::st_read("00_data/00_data_raw/cb_2018_us_state_5m/cb_2018_us_state_5m.shp", crs = "NAD83")

# Define mapping from EPA region codes to state names
state_to_region <- list(
  `01` = c("Connecticut", "Maine", "Massachusetts", "New Hampshire", "Rhode Island", "Vermont"),
  `02` = c("New Jersey", "New York", "Puerto Rico", "U.S. Virgin Islands"),
  `03` = c("Delaware", "District Of Columbia", "Maryland", "Pennsylvania", "Virginia", "West Virginia"),
  `04` = c("Alabama", "Florida", "Georgia", "Kentucky", "Mississippi", "North Carolina", "South Carolina", "Tennessee"),
  `05` = c("Illinois", "Indiana", "Michigan", "Minnesota", "Ohio", "Wisconsin"),
  `06` = c("Arkansas", "Louisiana", "New Mexico", "Oklahoma", "Texas"),
  `07` = c("Iowa", "Kansas", "Missouri", "Nebraska"),
  `08` = c("Colorado", "Montana", "North Dakota", "South Dakota", "Utah", "Wyoming"),
  `09` = c("Arizona", "California", "Hawaii", "Nevada", "American Samoa", "Guam", "Northern Mariana Islands"),
  `10` = c("Alaska", "Idaho", "Oregon", "Washington")
)

# Function to find the EPA region for a given state name
find_region <- function(state_name) {
  formatted_name <- str_to_title(state_name)
  for (region in names(state_to_region)) {
    if (formatted_name %in% state_to_region[[region]]) {
      return(region)
    }
  }
  return(NA)
}

# Assign EPA region to US states
states_df <- US_SHAPE %>%
  mutate(EPA_REGION = sapply(NAME, find_region))

# Dissolve state geometries by EPA region
epa_regions <- states_df %>%
  group_by(EPA_REGION) %>%
  summarize(geometry = st_union(geometry))

# Plot: All points
ggplot(epa_regions) +
  geom_sf(fill="#F0EFF4") +
  geom_point(aes(x=LONGITUDE83, y=LATITUDE83, color=Treatment), data=points, size=0.01) +
  geom_sf(fill=NA, color="black") +
  coord_sf(xlim = c(-130, -60), ylim = c(23, 52)) +
  theme_bw() +
  ylab("") + xlab("") +
  theme(legend.position = c(0.9, 0.2),
        legend.background = element_rect(fill="white", linewidth=0.2, linetype="solid", colour="black")) +
  scale_color_manual(values=c("#0A2463","#FB3640"))

ggsave("03_figures/appendix/figure_a1.pdf", width=10, height=7)
ggsave("03_figures/appendix/figure_a1.png", width=10, height=7)

# Plot: EPA-only points
points_epa <- points[points$PGM_SYS_ID %in% epa_only$PGM_SYS_ID, ] %>%
  select(PGM_SYS_ID, LONGITUDE83, LATITUDE83, Treatment) %>%
  unique()

ggplot(epa_regions) +
  geom_sf(fill="#F0EFF4") +
  geom_point(aes(x=LONGITUDE83, y=LATITUDE83, color=Treatment), data=points_epa, size=0.02, alpha=3) +
  geom_sf(fill=NA, color="black") +
  coord_sf(xlim = c(-130, -60), ylim = c(23, 52)) +
  theme_bw() +
  ylab("") + xlab("") +
  theme(legend.position = c(0.9, 0.2),
        legend.background = element_rect(fill="white", linewidth=0.2, linetype="solid", colour="black")) +
  scale_color_manual(values=c("#0A2463","#FB3640"))

ggsave("03_figures/figure1.pdf", width=10, height=7)
ggsave("03_figures/figure1.jpg", width=10, height=7, dpi=600)

# Inspection counts
load("00_data/01_data_processed/locations_sites.Rdata")     

inspections %>% 
  mutate(n = log(n)) %>% 
  ggplot(aes(x=YEAR,y=n, color=Treatment, fill=Treatment))+
  facet_wrap(~STATE_EPA_FLAG)+
  geom_vline(xintercept = 1994, linetype=2)+
  geom_vline(xintercept = 1999, linetype=2)+
  geom_line()+
  geom_point()+
  theme_bw()+
  scale_color_manual(values = (c("#0A2463","#FB3640")))+
  scale_fill_manual(values = (c("#0A2463","#FB3640")))+
  theme(legend.position = "bottom")+
  scale_x_continuous(breaks = c(1985:2005))+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  coord_cartesian(ylim=c(0,15))+
  xlab("Year")+
  ylab("log(Count)")

ggsave("03_figures/appendix/figure_a2.pdf",width=12,height=4)

# Raw inspection count
load("00_data/01_data_processed/inspection_count.Rdata")

inspections %>% 
  mutate(n = log(n)) %>% 
  ggplot(aes(x=YEAR,y=n, color=Treatment, fill=Treatment))+
  facet_wrap(~STATE_EPA_FLAG)+
  geom_vline(xintercept = 1994, linetype=2)+
  geom_vline(xintercept = 1999, linetype=2)+
  geom_line()+
  geom_point()+
  theme_bw()+
  scale_color_manual(values = (c("#0A2463","#FB3640")))+
  scale_fill_manual(values = (c("#0A2463","#FB3640")))+
  theme(legend.position = "bottom")+
  scale_x_continuous(breaks = c(1985:2005))+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  coord_cartesian(ylim=c(0,15))+
  xlab("Year")+
  ylab("log(Count)")

ggsave("03_figures/appendix/figure_a2.pdf",width=12,height=4)

rm(list = ls())

